org 100h ; assume cs<=0x2a18 [0xfd..ff]=0

T1 equ 27*4
T2 equ 19*4
T3 equ -32*4
T4 equ 23*4

C equ $+8

; ds is moved: all  constant access is [ss:bp+?]
%define w(xx) word[byte bp+si-0x100+xx]
%define d(xx) dword[byte bp+si-0x100+xx]

  push 0xa000   ;<-[bp+si] points here
; the 4 most significant bytes of qword[bp+di] is TAN
  lds bp,[si-3] ; bp=0, ds=0x6800  table: cos
  pop es              ; es=0xa000  screen
  mov ax,0x4f02
  mov gs,ax           ; gs=0x4f02  table: 1 + 2*cos
  mov bx,0x10e
  int 10h    ; 320x200 with 65536 colors; assume it's ok (ax=0x004f)

  add ax,0x39c9 ; 10 05 c9 39, should be "db 0f c9 39"
TABLE_STEP equ $-4  ; 0.000383495197 = 2pi / 16384 ~ 1 / (256 * pi^2)
  mov fs,ax          ; fs=0x3a18  table: color_mul/cos

;Cos table with 16384 entries
;  fninit
COS_TAB:
  imul bx,[bp+di],4 ; bx=[ss:bp+di]=[ss:-2]=angle (0 on init)
  fild word[bp+di]
  fmul d(TABLE_STEP)
  fcos           ;; cos(angle/65536*2pi): adjust period to 2pi
  fst dword[bx]

  fld1
  fadd st1
  fadd st1
  fstp dword[gs:bx] ; 1 + 2*cos(...)

  fldpi           ; color_mul = 1.44
  fdivrp st1,st0  ; color_mul / cos(...)
  fstp dword[fs:bx]
  inc word[bp+di] ; next angle
  jnz COS_TAB     ; bx=4

; Frame loop
M: ; bp=0 cx=timer
  inc cx

  imul bx,cx,T2
  fld dword[bx]         ;; cos(t2)
  fidiv w(CZOOM)        ;; zoom_wave = cos(t2)/zoom

  imul bx,cx,T1
  fmul dword[bx]        ;; cos(t1) * zoom_wave
  fsubr dword[bx]       ;; C=cos(t1) * (1-zoom_wave)
  fstp d(C)
  fld dword[bx-0x4000]  ;; sin(t1)
  fdiv dword[bx]        ;; TAN=sin(t1)/cos(t1)
  fstp qword[bp+si]     ; store it to a nice place

; Pixel loop

X mov ax,0xcccd
  mul di
  add dx,0x9c80
  xchg ax,bx
  pusha ; [-18-16-14-12-10 -8 -6 -4]
        ;   di si bp sp bx dx cx ax
        ;                  yy
        ;                x x
  mov ax,10         ; ax = number of iterations
;  mov ax,6

CZOOM equ $-2  ; per-iteration scale amplitude
  call IT
  popa

  pusha
  add di,di
  jnz NZ_DI
  push ax        ; fix the pixel in the center?
  mov ax,0x4f05  ; each line: set window, assume 64kB granularity
  cwd
  adc dx,dx
  xor bx,bx      ; bh=0 bl=window=0 dx=page
  int 10h
  pop ax
NZ_DI:
  stosw
;  stosw   ; 2x faster: draw two pixels
  popa
;  inc di
  inc di
  jnz X

  in al,60h ; ESC check
  cmp al,1
  jne M     ; exit later

IT:
  ; [-18-16-14-12-10 -8 -6 -4]
  ;   di si bp sp bx dx cx ax
  ;                  yy
  ;                x x

Z fldz
  inc bp
  jpo Z   ; loop 3x  ;; R=0 G=0 B=0
  dec bp
XY:
  dec bp            ; bp:1,0 -> [-8],[-9] -> y,x

;  fldl2e
;  fimul word[bp-9] ;; x[-47274..47274] y R G B
  fild word[bp-9]
  fadd st0          ;; x[-65536..65536] y R G B

  jpo XY  ; loop 2x, bp=0 again

  call LEN
  imul di,[bp+si],4 ; di = d = 65536/2pi * length(x,y)/2

  imul bx,cx,T3
  imul dx,cx,T4

  sub dx,di         ; dx = t4-d
  add di,bx         ; di = d-t3

  mov cx,0x8025  ; shift length, ~0x8000 xor constant, RGB phase shift (0x25)

; square fold, rotate and scale
  ;[x] = [C -S]*[x-round(x)]
  ;[y]   [S  C] [y-round(y)]
R fld st1         ;; y x y R G B    | x Sy x Cy R G B

  fistp dword[bp+si] ; wrap: keep only bottom 16 bits
  xor word[bp+si],cx ;~0x8000
  fild word[bp+si] ;; y = y-round(y) | x = x-round(x)

  fmul d(C)        ;; Cy x y R G B   | Cx Sy x Cy R G B
  fst st2          ;; Cy x Cy R G B  | Cx Sy Cx Cy R G B
  fmul qword[bp+si]; multiply by TAN
  neg al           ;; Sy x Cy R G B  | Sx Sy Cx Cy R G B
  js R  ; al!=0
  faddp st3,st0  ;; Sy Cx Sx+Cy R G B
  fsubp st1,st0  ;; x=Cx-Sy y=Sx+Cy R G B

; interfering concentric circles

; subroutine: compute length of 2D vector, scale to access cos table
LEN: ;; x y -> [bp+si] = sqrt(x*x+y*y)/65536/2 * 16384/2pi = sqrt(x*x+y*y)*C = sqrt(C^2*(x*x+y*y))
  fld st1
  fmul st0
  fld st1
  fmul st0
  faddp
  fmul d(TABLE_STEP)  ; exact: (16384/2pi/65536/2)^2 = 0.000395785+
  fsqrt
  fistp word[bp+si]
  jnz SKIP
  ret
SKIP:

  imul bx,[bp+si],10*4 ; 65536/2pi * 5*length(x,y)
F fld dword[fs:bx+di] ;; k=color_mul/cos(5*length(x,y) + d - t3) x y R G B
  inc bp
  jpo F ; loop 3x     ;; k k k x y R G B

; RGB += k * ( 0.5 + cos(3*(t4 - d + i/100) + [0 0.9 1.8]) );
  imul bx,ax,26*4     ; bx = q = 65536/2pi * (i/100
  add bx,dx           ;                       +t4-d
  imul bx,3           ;                      ) * 3

G fmul dword[gs:bx] ;; k*(1+2cos(q)) k k x y R G B
  faddp st5,st0     ;; k k x y R+=k*(1+2cos(q)) G B
  sub bh,cl;37   ; ~ 0.9 * 256/2pi
  dec bp
  jpo G  ; loop 3x ;; x y [R G B]+=k*(1+2cos(q+[0 0.9 1.8]))

  dec ax
  jnz R   ; ax=0

  fcompp            ;; R G B

; Assemble RGB into 16-bit color.
; cl & 0x1f = shift length: cycle 5,6,5
COL:
  fmul st0          ;; R^2 G^2 B^2
  fistp word[bp+si] ; if it's > 0x7fff, store 0x8000
  imul bx,[bp+si],2 ; double, set carry if it was > 0x3fff
  sbb bx,bp         ; overflow -> 0xffff
  shld [bp-4],bx,cl ; rrrrrggggggbbbbb
  xor cl,5^6
  inc si
  jpo COL ; loop 3x

  ret
